# ANES_stateYoungAugmentation.R

# Part of the replication archive for 
#
#   Bullock, John G. 2020. "Education and Attitudes toward Redistribution in
#   the United States." British Journal of Political Science 50.



# In the cumulative ANES, state "where you grew up" or at 14 is  
# missing for several years.  The code in this section fills in the  
# missing data with responses from related questions.


ANES_stateYoungAugmentation <- function (ANES_obj) {
  require(car)    # for Recode()
  require(dplyr)  # for %>%
  require(haven)  # for read_dta()
  source('functions/NES_StateRecode.R')
  

  ############################################################################
  # DOWNLOAD ANES TIME-SERIES DATASETS (1974-1984)
  ############################################################################
  
  # CREATE TEMPORARY FILES
  years <- seq(1974, 1984, by = 2) %>% c(., 2002, 2004)
  for (y in years) {
    assign(paste0("ANES_", y), tempfile(fileext = ".zip"))
  }
  
  # DOWNLOAD AND IMPORT THE DATASETS
  # The code below downloads the cumulative ANES from its home at 
  # https://electionstudies.org. The ANES does not provide persistent links 
  # (for example, DOI-based links) to any of its files, and the links included  
  # in the code below will stop working when the ANES changes the URLs. When  
  # that happens, you need only update the link in the download.file() command  
  # to correct the problem. Of course, you can also download the ANES files   
  # "by hand" and replace the code below with commands which load the datasets
  # that you've downloaded.  [2019 07 14]
  #   For more on the benefit to political science of using persistent links, 
  # see http://doi.org/10.1017/S1049096516002353.
  download.file(
    url      = 'https://electionstudies.org/wp-content/uploads/2018/07/anes1974dta.zip', 
    destfile = ANES_1974)
  ANES1974 <- unz(ANES_1974, 'NES1974.dta') %>% 
    read_dta()
  
  download.file(
    url      = 'https://electionstudies.org/wp-content/uploads/2018/06/anes1976dta.zip', 
    destfile = ANES_1976)
  ANES1976 <- unz(ANES_1976, 'NES1976.dta') %>% 
    read_dta()
  
  download.file(
    url      = 'https://electionstudies.org/wp-content/uploads/2018/06/anes1978dta.zip', 
    destfile = ANES_1978)
  ANES1978 <- unz(ANES_1978, 'nes1978.dta') %>% 
    read_dta()
  
  download.file(
    url      = 'https://electionstudies.org/wp-content/uploads/2018/06/anes1980dta.zip', 
    destfile = ANES_1980)
  ANES1980 <- unz(ANES_1980, 'NES1980.dta') %>% 
    read_dta()

  download.file(
    url      = 'https://electionstudies.org/wp-content/uploads/2018/06/anes1982dta.zip', 
    destfile = ANES_1982)
  ANES1982 <- unz(ANES_1982, 'NES1982.dta') %>% 
    read_dta()

  download.file(
    url      = 'https://electionstudies.org/wp-content/uploads/2018/06/anes1984dta.zip', 
    destfile = ANES_1984)
  ANES1984 <- unz(ANES_1984, 'NES1984.dta') %>% 
    read_dta()

  download.file(
    url      = 'https://electionstudies.org/wp-content/uploads/2018/06/anes2002TSdta.zip', 
    destfile = ANES_2002)
  ANES2002 <- unz(ANES_2002, 'anes2002TS.dta') %>% 
    read_dta()
  
  download.file(
    url      = 'https://electionstudies.org/wp-content/uploads/2018/06/anes2004TSdta.zip', 
    destfile = ANES_2004)
  ANES2004 <- unz(ANES_2004, 'anes2004TS.dta') %>% 
    read_dta()
  
  
  
  ############################################################################
  # CODE STATE OF RESIDENCE AT "ABOUT 14 YEARS OLD"  
  ############################################################################
  # The stateYoung variable is based on a question about "where you 
  # grew up."  But what we really want to know is where people were 
  # living when they were 14 years old.  Between 1974 and 1984, 
  # respondents were asked where they lived "when you were about 14 
  # years old."  So I overwrite the stateYoung values with the answers
  # to the "14" question for 1974-1984 respondents.  
  state.yr14.1974 <- NES_StateRecode(ANES1974$V742542, twoDigit = TRUE)
  ID.1974 <- str_pad(ANES1974$V742002, width=4, side='left', pad='0') 
  ID.1974 <- as.numeric(paste('1974', ID.1974, sep=''))
  for (i in ID.1974) {
    rownum <- which(ANES_obj$ID.unique == i)
    ANES_obj$stateYoung[rownum] <- state.yr14.1974[ID.1974 == i]
  }
  rm(i, rownum, ANES1974, state.yr14.1974, ID.1974)
    
  # 1976.  See "CENSUS COUNTY CODES" in the appendix to the 1976 
  # codebook.  
  state.yr14.1976 <- substr(ANES1976$V763501, 0, 2)
  state.yr14.1976 <- car::recode(
    state.yr14.1976, 
    '01="AL"; 04="AZ"; 05="AR"; 06="CA"; 08="CO"; 09="CT";
      10="DE"; 11="DC"; 12="FL"; 13="GA"; 16="ID"; 17="IL"; 
      18="IN"; 19="IA"; 20="KS"; 21="KY"; 22="LA"; 23="ME"; 24="MD"; 
      25="MA"; 26="MI"; 27="MN"; 28="MS"; 29="MO"; 30="MT"; 31="NE"; 
      32="NV"; 33="NH"; 34="NJ"; 35="NM"; 36="NY"; 37="NC"; 38="ND";
      39="OH"; 40="OK"; 41="OR"; 42="PA"; 44="RI"; 45="SC"; 46="SD"; 
      47="TN"; 48="TX"; 49="UT"; 50="VT"; 51="VA"; 53="WA"; 54="WV"; 
      55="WI"; 56="WY"; else=NA',
    as.factor = TRUE)   
  ID.1976 <- str_pad(ANES1976$V763002, width=4, side='left', pad='0') 
  ID.1976 <- as.numeric(paste('1976', ID.1976, sep=''))
  for (i in ID.1976) {
    rownum <- which(ANES_obj$ID.unique == i)
    ANES_obj$stateYoung[rownum] <- state.yr14.1976[ID.1976 == i]
  }
  rm(i, rownum, ANES1976, state.yr14.1976, ID.1976)
    
  # 1978.  V780624 has NA for a lot of people who reported living in 
  # small towns when they were 14 years old.  Their states of residence
  # at age 14 are in V780626.
  state.yr14.1978 <- car::recode(
    ANES1978$V780624,
    '1:14="AL"; 200:207="AZ"; 300:309="AR"; 400:556="CA"; 600:612="CO";
      700:733="CT"; 800="DE"; 900="DC"; 1000:1031="FL"; 1100:1112="GA";
      1300:1303="ID"; 1400:1460="IL"; 1500:1520="IN"; 1600:1615="IA";
      1700:1709="KS"; 1800:1810="KY"; 1900:1999="LA"; 2000:2099="ME";
      2100:2199="MD"; 2200:2299="MA"; 2300:2399="MI"; 2400:2499="MN";
      2500:2599="MS"; 2600:2699="MO"; 2700:2799="MT"; 2800:2899="NE";
      2900:2999="NV"; 3000:3099="NH"; 3100:3199="NJ"; 3200:3299="NM";
      3300:3399="NY"; 3400:3499="NC"; 3500:3599="ND"; 3600:3699="OH";
      3700:3799="OK"; 3800:3899="OR"; 3900:3999="PA"; 4000:4099="RI";
      4100:4199="SC"; 4200:4299="SD"; 4300:4399="TN"; 4400:4499="TX";
      4500:4599="UT"; 4600:4699="VT"; 4700:4799="VA"; 4800:4899="WA";
      4900:4999="WV"; 5000:5099="WI"; 5100:5199="WY"; else=NA',
    as.factor = TRUE)
  levels(state.yr14.1978) <- c(levels(state.yr14.1978), "WY") 
  state.yr14.1978[is.na(state.yr14.1978)] <- car::recode(
    ANES1978$V780626, 
    'c("AK", "FC", "PR", "YN", "") = NA',
    as.factor = TRUE)[is.na(state.yr14.1978)]
  ID.1978 <- str_pad(ANES1978$V780002, width=4, side='left', pad='0') 
  ID.1978 <- as.numeric(paste('1978', ID.1978, sep=''))
  for (i in ID.1978) {
    rownum <- which(ANES_obj$ID.unique == i)
    ANES_obj$stateYoung[rownum] <- state.yr14.1978[ID.1978 == i]
  }
    
  # 1980.  See "1980 CITIES NOTE" in the appendix to the 1980 codebook.
  #   V800711 has NA for a lot of people who reported living in small 
  # towns when they were 14 years old.  Their states of residence at age
  # 14 are in V800713.
  state.yr14.1980 <- car::recode(
    ANES1980$V800711,
    '1:14="AL"; 200:207="AZ"; 300:309="AR"; 400:556="CA"; 600:612="CO";
      700:733="CT"; 800="DE"; 900="DC"; 1000:1031="FL"; 1100:1112="GA";
      1300:1303="ID"; 1400:1460="IL"; 1500:1520="IN"; 1600:1615="IA";
      1700:1709="KS"; 1800:1810="KY"; 1900:1999="LA"; 2000:2099="ME";
      2100:2199="MD"; 2200:2299="MA"; 2300:2399="MI"; 2400:2499="MN";
      2500:2599="MS"; 2600:2699="MO"; 2700:2799="MT"; 2800:2899="NE";
      2900:2999="NV"; 3000:3099="NH"; 3100:3199="NJ"; 3200:3299="NM";
      3300:3399="NY"; 3400:3499="NC"; 3500:3599="ND"; 3600:3699="OH";
      3700:3799="OK"; 3800:3899="OR"; 3900:3999="PA"; 4000:4099="RI";
      4100:4199="SC"; 4200:4299="SD"; 4300:4399="TN"; 4400:4499="TX";
      4500:4599="UT"; 4600:4699="VT"; 4700:4799="VA"; 4800:4899="WA";
      4900:4999="WV"; 5000:5099="WI"; 5100:5199="WY"; else=NA',
    as.factor = TRUE)
  levels(state.yr14.1980) <- c(levels(state.yr14.1980), "WY") 
  state.yr14.1980.smallTowns <- factor(ANES1980$V800713, levels = levels(state.yr14.1980))
  state.yr14.1980[is.na(state.yr14.1980)] <- state.yr14.1980.smallTowns[is.na(state.yr14.1980)]
  ID.1980 <- str_pad(ANES1980$V800004, width=4, side='left', pad='0') 
  ID.1980 <- as.numeric(paste('1980', ID.1980, sep=''))
  for (i in ID.1980) {
    rownum <- which(ANES_obj$ID.unique == i)
    ANES_obj$stateYoung[rownum] <- state.yr14.1980[ID.1980 == i]
  }
    
  # 1982.   See "1982 CITIES NOTE" in the appendix to the 1982 codebook.
  #   V820753 has NA for a lot of people who reported living in 
  # small towns when they were 14 years old.  Their states of residence
  # at age 14 are in V820755.
  state.yr14.1982 <- car::recode(
    ANES1982$V820753,
    '1:14="AL"; 200:207="AZ"; 300:309="AR"; 400:556="CA"; 600:612="CO";
      700:733="CT"; 800="DE"; 900="DC"; 1000:1031="FL"; 1100:1112="GA";
      1300:1303="ID"; 1400:1460="IL"; 1500:1520="IN"; 1600:1615="IA";
      1700:1709="KS"; 1800:1810="KY"; 1900:1999="LA"; 2000:2099="ME";
      2100:2199="MD"; 2200:2299="MA"; 2300:2399="MI"; 2400:2499="MN";
      2500:2599="MS"; 2600:2699="MO"; 2700:2799="MT"; 2800:2899="NE";
      2900:2999="NV"; 3000:3099="NH"; 3100:3199="NJ"; 3200:3299="NM";
      3300:3399="NY"; 3400:3499="NC"; 3500:3599="ND"; 3600:3699="OH";
      3700:3799="OK"; 3800:3899="OR"; 3900:3999="PA"; 4000:4099="RI";
      4100:4199="SC"; 4200:4299="SD"; 4300:4399="TN"; 4400:4499="TX";
      4500:4599="UT"; 4600:4699="VT"; 4700:4799="VA"; 4800:4899="WA";
      4900:4999="WV"; 5000:5099="WI"; 5100:5199="WY"; else=NA',
    as.factor = TRUE)
  levels(state.yr14.1982) <- c(levels(state.yr14.1982), "WY") 
  state.yr14.1982.smallTowns <- factor(ANES1982$V820755, levels = levels(state.yr14.1982))
  state.yr14.1982[is.na(state.yr14.1982)] <- state.yr14.1982.smallTowns[is.na(state.yr14.1982)]
  ID.1982 <- str_pad(ANES1982$V820004, width=4, side='left', pad='0') 
  ID.1982 <- as.numeric(paste('1982', ID.1982, sep=''))
  for (i in ID.1982) {
    rownum <- which(ANES_obj$ID.unique == i)
    ANES_obj$stateYoung[rownum] <- state.yr14.1982[ID.1982 == i]
  }
        
  # 1984.  See "1984 CITIES NOTE" and "1984 STATE AND COUNTRY NOTE" in 
  # the appendix to the 1984 codebook.  
  state.yr14.1984 <- NES_StateRecode(ANES1984$V840701)
  ID.1984 <- str_pad(ANES1984$V840004, width=4, side='left', pad='0') 
  ID.1984 <- as.numeric(paste('1984', ID.1984, sep=''))
  for (i in ID.1984) {
    rownum <- which(ANES_obj$ID.unique == i)
    ANES_obj$stateYoung[rownum] <- state.yr14.1984[ID.1984 == i]
  }
    
    
    
  ############################################################################
  # 2004 ANES  
  ############################################################################
  # State of residence "when young" is in the 2004 time series, so we 
  # just plug it in here.  
  ID.unique.2004  <- str_pad(ANES2004$V040001, width=4, side='left', pad='0') 
  ID.unique.2004  <- as.integer(paste('2004', ID.unique.2004, sep=''))
  stateYoung.2004 <- NES_StateRecode(ANES2004$V043307)
  ANES_obj$stateYoung[ANES_obj$ID.unique %in% ID.unique.2004] <- stateYoung.2004 
    
    
    
  ##############################################################################
  # 2002 ANES - A SPECIAL CASE
  ##############################################################################
  # State of residence for 2002 isn't in the cumulative ANES, but it is in the
  # 2002 time-series data.  So I use it to impute residence at 14 for some 
  # respondents.  
  #
  # Most subjects in the 2002 ANES dataset were previously interviewed for the 
  # 2000 ANES dataset.  (See ANES2002$V021001.)  I don't want to double-count 
  # those respondents. The code below, then, is doing the right thing: it is 
  # producing length-of-residence-in-this-community data for only the "fresh" 
  # 2002 respondents.  
  state.current.2002     <- ANES2002$V021201b  # State of residence at time of interview.
  years.in.comm.2002     <- car::recode(ANES2002$V023139, '999=NA')
  years.in.comm.2002[is.na(years.in.comm.2002)] <- car::recode(ANES2002$V023139a, '888:999=NA')[is.na(years.in.comm.2002)]
  same.comm.since14.2002 <- which(ANES_obj$age[ANES_obj$yearInt==2002] - years.in.comm.2002 <= 14)
  state.yr14.2002        <- rep(NA, nrow(ANES2002))
  state.yr14.2002[same.comm.since14.2002] <- state.current.2002[same.comm.since14.2002]
  ANES_obj$stateYoung[ANES_obj$yearInt==2002] <- state.yr14.2002
    
    
    
  ##############################################################################
  # IMPUTE STATE OF RESIDENCE AT 14 FROM STATE OF BIRTH
  ##############################################################################
  if (interactive()) {
    sum(is.na(ANES_obj$stateOfBirth))                           
    sum(is.na(ANES_obj$stateYoung) & !is.na(ANES_obj$stateOfBirth)) # number of cases imputed by the command below
  }
  ANES_obj$stateYoung[is.na(ANES_obj$stateYoung)] <- ANES_obj$stateOfBirth[is.na(ANES_obj$stateYoung)]
    
    
    
  ##############################################################################
  # RETURN ANES OBJECT
  ##############################################################################
  return(ANES_obj)
  
}
 